{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "93lzZVsMazQN"
      },
      "source": [
        "### 1. Setup Development Environment\n",
        "In our example, we use the [PyTorch Deep Learning AMI](https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-pytorch.html) with already set up CUDA drivers and PyTorch installed. We still have to install the Hugging Face Libraries, including transformers and datasets. Running the following cell will install all the required packages."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9I8Q4Lh-aEIF",
        "outputId": "e7a03c06-8a04-4215-bcaa-8afa0eacdbc9"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: peft==0.2.0 in /usr/local/lib/python3.10/dist-packages (0.2.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (23.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (5.9.5)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (6.0)\n",
            "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (2.0.1+cu118)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (4.27.2)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from peft==0.2.0) (0.17.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (3.12.0)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.2.0) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.2.0) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.2.0) (16.0.5)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.2.0) (0.15.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.2.0) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.2.0) (2.27.1)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.2.0) (0.13.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.2.0) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers->peft==0.2.0) (2023.4.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft==0.2.0) (2.1.2)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.2.0) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.2.0) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.2.0) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.2.0) (3.4)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft==0.2.0) (1.3.0)\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: rouge-score in /usr/local/lib/python3.10/dist-packages (0.1.2)\n",
            "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (2.12.2)\n",
            "Requirement already satisfied: py7zr in /usr/local/lib/python3.10/dist-packages (0.20.5)\n",
            "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.4.0)\n",
            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score) (3.8.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.22.4)\n",
            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.16.0)\n",
            "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.54.0)\n",
            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (2.17.3)\n",
            "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.0.0)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.4.3)\n",
            "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.20.3)\n",
            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (2.27.1)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (67.7.2)\n",
            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (0.7.0)\n",
            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.8.1)\n",
            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (2.3.0)\n",
            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (0.40.0)\n",
            "Requirement already satisfied: texttable in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.6.7)\n",
            "Requirement already satisfied: pycryptodomex>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from py7zr) (3.18.0)\n",
            "Requirement already satisfied: pyzstd>=0.14.4 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.15.7)\n",
            "Requirement already satisfied: pyppmd<1.1.0,>=0.18.1 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.0.0)\n",
            "Requirement already satisfied: pybcj>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.0.1)\n",
            "Requirement already satisfied: multivolumefile>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.2.3)\n",
            "Requirement already satisfied: brotli>=1.0.9 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.0.9)\n",
            "Requirement already satisfied: inflate64>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.3.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from py7zr) (5.9.5)\n",
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (5.3.0)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (0.3.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (4.9)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard) (1.3.1)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard) (3.4)\n",
            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard) (2.1.2)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (8.1.3)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (1.2.0)\n",
            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (2022.10.31)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (4.65.0)\n",
            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard) (0.5.0)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard) (3.2.2)\n"
          ]
        }
      ],
      "source": [
        "# install Hugging Face Libraries\n",
        "!pip install \"peft==0.2.0\"\n",
        "!pip install \"transformers==4.27.2\" \"datasets==2.9.0\" \"accelerate==0.17.1\" \"evaluate==0.4.0\" \"bitsandbytes==0.37.1\" loralib --upgrade --quiet\n",
        "# install additional dependencies needed for training\n",
        "!pip install rouge-score tensorboard py7zr"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7_s4srVwa7oI"
      },
      "source": [
        "### 2. Load and prepare the dataset\n",
        "we will use the [samsum](https://huggingface.co/datasets/samsum) dataset, a collection of about 16k messenger-like conversations with summaries. Conversations were created and written down by linguists fluent in English."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sBu-jJ-XbArS"
      },
      "source": [
        "Dataset : \n",
        "```json\n",
        "{\n",
        "  \"id\": \"13818513\",\n",
        "  \"summary\": \"Amanda baked cookies and will bring Jerry some tomorrow.\",\n",
        "  \"dialogue\": \"Amanda: I baked cookies. Do you want some?\\r\\nJerry: Sure!\\r\\nAmanda: I'll bring you tomorrow :-)\"\n",
        "}\n",
        "```\n",
        "\n",
        "To load the samsum dataset, we use the load_dataset() method from the 🤗 Datasets library."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 101,
          "referenced_widgets": [
            "53f8a742f41a45c69b999bdc475ce430",
            "aa70a2ee95b64d129ce2eeb5a247bb24",
            "b108f9f3d55c4983aa342465ddd2bf96",
            "e5bb6b1babce463687f5e75ef56c468f",
            "01d6ddac108a46f0b34a8ebc6ecb4a33",
            "258b6cf54bb24a4593da93c79d5e535e",
            "e8f8c2a564094f6c9e984d1de7b4a536",
            "9f6bac398f734e1c9beb259289baf49b",
            "dbf8d79e1bcf41c7aacf805475f593a3",
            "22788242a4154607be60109abc2e1c88",
            "9a69f1452bb34a6495ebfe376c7930ea"
          ]
        },
        "id": "fXdqfxW0aElc",
        "outputId": "6960417d-0800-42a4-de7b-1c7101f693fa"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING:datasets.builder:Found cached dataset samsum (/root/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e)\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "53f8a742f41a45c69b999bdc475ce430",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "  0%|          | 0/3 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Train dataset size: 14732\n",
            "Test dataset size: 819\n"
          ]
        }
      ],
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "# Load dataset from the hub\n",
        "dataset = load_dataset(\"samsum\")\n",
        "\n",
        "print(f\"Train dataset size: {len(dataset['train'])}\")\n",
        "print(f\"Test dataset size: {len(dataset['test'])}\")\n",
        "\n",
        "# Train dataset size: 14732\n",
        "# Test dataset size: 819"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "C2D26979bofW"
      },
      "source": [
        "To train our model, we need to convert our inputs (text) to token IDs. This is done by a 🤗 Transformers Tokenizer. If you are not sure what this means, check out [chapter 6](https://huggingface.co/course/chapter6/1?fw=tf) of the Hugging Face Course."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "id": "kI0hfJ31bjrO"
      },
      "outputs": [],
      "source": [
        "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
        "\n",
        "model_id=\"google/flan-t5-xxl\"\n",
        "\n",
        "# Load tokenizer of FLAN-t5-XL\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_id)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "R-mMGCbjb0VE"
      },
      "source": [
        "Before we can start training, we need to preprocess our data. Abstractive Summarization is a text-generation task. Our model will take a text as input and generate a summary as output. We want to understand how long our input and output will take to batch our data efficiently."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 121,
          "referenced_widgets": [
            "c122efad9a644f6a8a4d686f9707ece7",
            "b8736b2ef5fc4bbba9a1946d0d1951c3",
            "fbf0168e87974d47a35a13bf2b66a72d",
            "1497de94cc784e6aaae149c257f12615",
            "b5ef42051d9e46fb9537972e2c645663",
            "48f41cd576454f538d83ab27b50034de",
            "9e8c25dffdfa431b8df9ee5f9fffafa6",
            "fc8a9c5ca08e491d883ce8006d200004",
            "f070e098fab84c50b7223d6424a9d369",
            "26c55b8aa6d44de7b2d3f61fc7c1d92d",
            "79fda5de6f6543f5aa35e5c7e2cd4513"
          ]
        },
        "id": "d7mRRHdIbv5O",
        "outputId": "45dede71-ef2c-48e5-f78e-696d8729dff7"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-28f9652739780da5.arrow\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Max source length: 255\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "c122efad9a644f6a8a4d686f9707ece7",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "  0%|          | 0/16 [00:00<?, ?ba/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Max target length: 50\n"
          ]
        }
      ],
      "source": [
        "from datasets import concatenate_datasets\n",
        "import numpy as np\n",
        "# The maximum total input sequence length after tokenization.\n",
        "# Sequences longer than this will be truncated, sequences shorter will be padded.\n",
        "tokenized_inputs = concatenate_datasets([dataset[\"train\"], dataset[\"test\"]]).map(lambda x: tokenizer(x[\"dialogue\"], truncation=True), batched=True, remove_columns=[\"dialogue\", \"summary\"])\n",
        "input_lenghts = [len(x) for x in tokenized_inputs[\"input_ids\"]]\n",
        "# take 85 percentile of max length for better utilization\n",
        "max_source_length = int(np.percentile(input_lenghts, 85))\n",
        "print(f\"Max source length: {max_source_length}\")\n",
        "\n",
        "# The maximum total sequence length for target text after tokenization.\n",
        "# Sequences longer than this will be truncated, sequences shorter will be padded.\"\n",
        "tokenized_targets = concatenate_datasets([dataset[\"train\"], dataset[\"test\"]]).map(lambda x: tokenizer(x[\"summary\"], truncation=True), batched=True, remove_columns=[\"dialogue\", \"summary\"])\n",
        "target_lenghts = [len(x) for x in tokenized_targets[\"input_ids\"]]\n",
        "# take 90 percentile of max length for better utilization\n",
        "max_target_length = int(np.percentile(target_lenghts, 90))\n",
        "print(f\"Max target length: {max_target_length}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0llAPTDXb8mv"
      },
      "source": [
        "We preprocess our dataset before training and save it to disk. You could run this step on your local machine or a CPU and upload it to the [HuggingFaceHub](https://huggingface.co/docs/hub/datasets-overview)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 153,
          "referenced_widgets": [
            "93b8752310254f9d97ef252e1fdd7c22",
            "578f354fa89f4f3e9a93fd8ee248c9ee",
            "b9d0cd91e7b248358919101528b22ec7",
            "6e57ad2848fe4f2bb0f89c79e4a41f54",
            "149fed4dd0af42cbac6ecf91aae96a95",
            "f1201bf45509432693eb958d61f2e9cf",
            "e23c50a9f877446f8a185d998929b07c",
            "c5f944d2f84841b1b8e4dcdeb8ab7e0d",
            "c4d2cbe030564618b312315e116bc3dd",
            "7a452b361c9147fdb46bd0cdd3ab8ec2",
            "cd993d7aaabb417e9523edcb036ae58c",
            "45f9d0d5af264eb7849c4d0f54ad9bda",
            "0f7df3565b9c490fbf160dee66f072bf",
            "22c63ed3758f47ef97a6606913493e16",
            "e194a109192e4451b27fa84c4c62d035",
            "e50462043a494cb4adbee14a2d22a98a",
            "aaf431dcd5f74b15ac953a1cc78f9eba",
            "1bef78565e514c93a63f052972dff5c8",
            "be5d717b5eeb438b902c23454a5add5e",
            "09391d8b737a47ed859a7d985c108272",
            "95272f282a1742618ec5fecae87ba97e",
            "2d5c032c012942b5af8f54a0cda4bc57",
            "866d10c8593a48b191625f8e88ea37bc",
            "c38113fe88314af5b73495d2869b0baf",
            "09308312030d4d75a2c13ac59becafa9",
            "ea88c4ce38584d2982fe4976b3b2e9dc",
            "2994e876d1b94608b1ddd95c10f105d3",
            "8ff7f533460c480d8828bfdcfe313d7b",
            "78f28b924827405c88e26fb3bb7babab",
            "0b46156d7bef45dc98b414cfa59c58f5",
            "83d7c0439c504879a2e27d24b6972038",
            "bb25b0588d4149a2b9c9dbeb327b2a33",
            "8f91724f405942a59d0ad87a887095da"
          ]
        },
        "id": "S5Tll2vXb2FV",
        "outputId": "da198d7d-3346-45e3-abbc-3c73970fc89a"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-843fdeabfea77c89.arrow\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "93b8752310254f9d97ef252e1fdd7c22",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "  0%|          | 0/1 [00:00<?, ?ba/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-8aa88cb10112f4aa.arrow\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Keys of tokenized dataset: ['input_ids', 'attention_mask', 'labels']\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "45f9d0d5af264eb7849c4d0f54ad9bda",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Saving the dataset (0/1 shards):   0%|          | 0/14732 [00:00<?, ? examples/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "866d10c8593a48b191625f8e88ea37bc",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Saving the dataset (0/1 shards):   0%|          | 0/819 [00:00<?, ? examples/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "def preprocess_function(sample,padding=\"max_length\"):\n",
        "    # add prefix to the input for t5\n",
        "    inputs = [\"summarize: \" + item for item in sample[\"dialogue\"]]\n",
        "\n",
        "    # tokenize inputs\n",
        "    model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)\n",
        "\n",
        "    # Tokenize targets with the `text_target` keyword argument\n",
        "    labels = tokenizer(text_target=sample[\"summary\"], max_length=max_target_length, padding=padding, truncation=True)\n",
        "\n",
        "    # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore\n",
        "    # padding in the loss.\n",
        "    if padding == \"max_length\":\n",
        "        labels[\"input_ids\"] = [\n",
        "            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels[\"input_ids\"]\n",
        "        ]\n",
        "\n",
        "    model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
        "    return model_inputs\n",
        "\n",
        "tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=[\"dialogue\", \"summary\", \"id\"])\n",
        "print(f\"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}\")\n",
        "\n",
        "# save datasets to disk for later easy loading\n",
        "tokenized_dataset[\"train\"].save_to_disk(\"data/train\")\n",
        "tokenized_dataset[\"test\"].save_to_disk(\"data/eval\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UnX--IxIcD5a"
      },
      "source": [
        "### 3. Fine-Tune T5 with LoRA and bnb int-8\n",
        "\n",
        "In addition to the LoRA technique, we will use [bitsanbytes.LLM.int8()](https://huggingface.co/blog/hf-bitsandbytes-integration)  to quantize out frozen LLM to int8. This allows us to reduce the needed memory for FLAN-T5 XXL ~4x.\n",
        "\n",
        "The first step of our training is to load the model. We are going to use [philschmid/flan-t5-xxl-sharded-fp16](https://huggingface.co/philschmid/flan-t5-xxl-sharded-fp16), which is a sharded version of [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl). The sharding will help us to not run off of memory when loading the model."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "id": "ABb1yRDQcBkv"
      },
      "outputs": [],
      "source": [
        "from transformers import AutoModelForSeq2SeqLM\n",
        "\n",
        "# huggingface hub model id\n",
        "model_id = \"philschmid/flan-t5-xxl-sharded-fp16\"\n",
        "\n",
        "# load model from the hub\n",
        "model = AutoModelForSeq2SeqLM.from_pretrained(model_id, load_in_8bit=True, device_map=\"auto\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vCe0U1yUcml5"
      },
      "source": [
        "Now, we can prepare our model for the LoRA int-8 training using `peft`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "id": "YOz5D6jfcglr"
      },
      "outputs": [],
      "source": [
        "from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training, TaskType\n",
        "\n",
        "# Define LoRA Config\n",
        "lora_config = LoraConfig(\n",
        " r=16,\n",
        " lora_alpha=32,\n",
        " target_modules=[\"q\", \"v\"],\n",
        " lora_dropout=0.05,\n",
        " bias=\"none\",\n",
        " task_type=TaskType.SEQ_2_SEQ_LM\n",
        ")\n",
        "# prepare int-8 model for training\n",
        "model = prepare_model_for_int8_training(model)\n",
        "\n",
        "# add LoRA adaptor\n",
        "model = get_peft_model(model, lora_config)\n",
        "model.print_trainable_parameters()\n",
        "\n",
        "# trainable params: 18874368 || all params: 11154206720 || trainable%: 0.16921300163961817"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1_5a798actse"
      },
      "source": [
        "As you can see, here we are only training 0.16% of the parameters of the model! This huge memory gain will enable us to fine-tune the model without memory issues.\n",
        "\n",
        "Next is to create a `DataCollator` that will take care of padding our inputs and labels. We will use the `DataCollatorForSeq2Seq` from the 🤗 Transformers library."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "id": "fjtBoOaDcpbq"
      },
      "outputs": [],
      "source": [
        "from transformers import DataCollatorForSeq2Seq\n",
        "\n",
        "# we want to ignore tokenizer pad token in the loss\n",
        "label_pad_token_id = -100\n",
        "# Data collator\n",
        "data_collator = DataCollatorForSeq2Seq(\n",
        "    tokenizer,\n",
        "    model=model,\n",
        "    label_pad_token_id=label_pad_token_id,\n",
        "    pad_to_multiple_of=8\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TWDjJ3hKc3_m"
      },
      "source": [
        "The last step is to define the hyperparameters `(TrainingArguments)` we want to use for our training."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "id": "dGhpERc8c2mp"
      },
      "outputs": [],
      "source": [
        "from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
        "\n",
        "output_dir=\"lora-flan-t5-xxl\"\n",
        "\n",
        "# Define training args\n",
        "training_args = Seq2SeqTrainingArguments(\n",
        "    output_dir=output_dir,\n",
        "\tauto_find_batch_size=True,\n",
        "    learning_rate=1e-3, # higher learning rate\n",
        "    num_train_epochs=5,\n",
        "    logging_dir=f\"{output_dir}/logs\",\n",
        "    logging_strategy=\"steps\",\n",
        "    logging_steps=500,\n",
        "    save_strategy=\"no\",\n",
        "    report_to=\"tensorboard\",\n",
        ")\n",
        "\n",
        "# Create Trainer instance\n",
        "trainer = Seq2SeqTrainer(\n",
        "    model=model,\n",
        "    args=training_args,\n",
        "    data_collator=data_collator,\n",
        "    train_dataset=tokenized_dataset[\"train\"],\n",
        ")\n",
        "model.config.use_cache = False  # silence the warnings. Please re-enable for inference!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N-93Xyb4dV-r"
      },
      "source": [
        "Let's now train our model and run the cells below. Note that for T5, some layers are kept in `float32` for stability purposes.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 20,
      "metadata": {
        "id": "8iYaXCRkdbox"
      },
      "outputs": [],
      "source": [
        "# train model\n",
        "trainer.train()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RERkk6cCelb1"
      },
      "source": [
        "For comparison a full [fine-tuning on FLAN-T5-XXL](https://www.philschmid.de/fine-tune-flan-t5-deepspeed#3-results--experiments) with the same duration 10h requires 8x A100 40GBs and costs ~322$.\n",
        "\n",
        "We can save our model to use it for inference and evaluate it. We will save it to disk for now, but you could also upload it to the [Hugging Face Hub](https://huggingface.co/docs/hub/main) using the model.push_to_hub method."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 21,
      "metadata": {
        "id": "ZrKOWJ40d6RQ"
      },
      "outputs": [],
      "source": [
        "# Save our LoRA model & tokenizer results\n",
        "peft_model_id=\"results\"\n",
        "trainer.model.save_pretrained(peft_model_id)\n",
        "tokenizer.save_pretrained(peft_model_id)\n",
        "# if you want to save the base model to call\n",
        "# trainer.model.base_model.save_pretrained(peft_model_id)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GlBfJNZrfbrH"
      },
      "source": [
        "Our LoRA checkpoint is only 84MB small and includes all of the learnt knowleddge for samsum."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "d9bmu_s4fej3"
      },
      "source": [
        "### 4. Evaluate & run Inference with LoRA FLAN-T5  \n",
        "We are going to use `evaluate` library to evaluate the `rogue` score. We can run inference using `PEFT` and `transformers`. For our FLAN-T5 XXL model, we need at least 18GB of GPU memory."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 23,
      "metadata": {
        "id": "gw8GrAa4fcAQ"
      },
      "outputs": [],
      "source": [
        "import torch\n",
        "from peft import PeftModel, PeftConfig\n",
        "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
        "\n",
        "# Load peft config for pre-trained checkpoint etc.\n",
        "peft_model_id = \"results\"\n",
        "config = PeftConfig.from_pretrained(peft_model_id)\n",
        "\n",
        "# load base LLM model and tokenizer\n",
        "model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path,  load_in_8bit=True,  device_map={\"\":0})\n",
        "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
        "\n",
        "# Load the Lora model\n",
        "model = PeftModel.from_pretrained(model, peft_model_id, device_map={\"\":0})\n",
        "model.eval()\n",
        "\n",
        "print(\"Peft model loaded\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0XGJ5QG2frXZ"
      },
      "source": [
        "Let’s load the dataset again with a random sample to try the summarization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {
        "id": "T3eOOkk8frqa"
      },
      "outputs": [],
      "source": [
        "from datasets import load_dataset\n",
        "from random import randrange\n",
        "\n",
        "\n",
        "# Load dataset from the hub and get a sample\n",
        "dataset = load_dataset(\"samsum\")\n",
        "sample = dataset['test'][randrange(len(dataset[\"test\"]))]\n",
        "\n",
        "input_ids = tokenizer(sample[\"dialogue\"], return_tensors=\"pt\", truncation=True).input_ids.cuda()\n",
        "# with torch.inference_mode():\n",
        "outputs = model.generate(input_ids=input_ids, max_new_tokens=10, do_sample=True, top_p=0.9)\n",
        "print(f\"input sentence: {sample['dialogue']}\\n{'---'* 20}\")\n",
        "\n",
        "print(f\"summary:\\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hxHH7xIefwHO"
      },
      "source": [
        "Nice! our model works! Now, lets take a closer look and evaluate it against the test set of processed dataset from samsum. Therefore we need to use and create some utilities to generate the summaries and group them together. The most commonly used metrics to evaluate summarization task is [rogue_score](https://en.wikipedia.org/wiki/ROUGE_(metric) short for Recall-Oriented Understudy for Gisting Evaluation). This metric does not behave like the standard accuracy: it will compare a generated summary against a set of reference summaries."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 24,
      "metadata": {
        "id": "tuxt83Ydfs9B"
      },
      "outputs": [],
      "source": [
        "import evaluate\n",
        "import numpy as np\n",
        "from datasets import load_from_disk\n",
        "from tqdm import tqdm\n",
        "\n",
        "# Metric\n",
        "metric = evaluate.load(\"rouge\")\n",
        "\n",
        "def evaluate_peft_model(sample,max_target_length=50):\n",
        "    # generate summary\n",
        "    outputs = model.generate(input_ids=sample[\"input_ids\"].unsqueeze(0).cuda(), do_sample=True, top_p=0.9, max_new_tokens=max_target_length)\n",
        "    prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)\n",
        "    # decode eval sample\n",
        "    # Replace -100 in the labels as we can't decode them.\n",
        "    labels = np.where(sample['labels'] != -100, sample['labels'], tokenizer.pad_token_id)\n",
        "    labels = tokenizer.decode(labels, skip_special_tokens=True)\n",
        "\n",
        "    # Some simple post-processing\n",
        "    return prediction, labels\n",
        "\n",
        "# load test dataset from distk\n",
        "test_dataset = load_from_disk(\"data/eval/\").with_format(\"torch\")\n",
        "\n",
        "# run predictions\n",
        "# this can take ~45 minutes\n",
        "predictions, references = [] , []\n",
        "for sample in tqdm(test_dataset):\n",
        "    p,l = evaluate_peft_model(sample)\n",
        "    predictions.append(p)\n",
        "    references.append(l)\n",
        "\n",
        "# compute metric\n",
        "rogue = metric.compute(predictions=predictions, references=references, use_stemmer=True)\n",
        "\n",
        "# print results\n",
        "print(f\"Rogue1: {rogue['rouge1']* 100:2f}%\")\n",
        "print(f\"rouge2: {rogue['rouge2']* 100:2f}%\")\n",
        "print(f\"rougeL: {rogue['rougeL']* 100:2f}%\")\n",
        "print(f\"rougeLsum: {rogue['rougeLsum']* 100:2f}%\")\n",
        "\n",
        "# Rogue1: 50.386161%\n",
        "# rouge2: 24.842412%\n",
        "# rougeL: 41.370130%\n",
        "# rougeLsum: 41.394230%"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "oOcHVuDvf6H4"
      },
      "source": [
        "Our PEFT fine-tuned FLAN-T5-XXL achieved a rogue1 score of 50.38% on the test dataset. For comparison a [full fine-tuning of flan-t5-base achieved a rouge1 score of 47.23](https://www.philschmid.de/fine-tune-flan-t5). That is a 3% improvements.\n",
        "\n",
        "It is incredible to see that our LoRA checkpoint is only `84MB` small and model achieves better performance than a smaller fully fine-tuned model.\n",
        "\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.11"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "01d6ddac108a46f0b34a8ebc6ecb4a33": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "09308312030d4d75a2c13ac59becafa9": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0b46156d7bef45dc98b414cfa59c58f5",
            "max": 819,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_83d7c0439c504879a2e27d24b6972038",
            "value": 819
          }
        },
        "09391d8b737a47ed859a7d985c108272": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0b46156d7bef45dc98b414cfa59c58f5": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0f7df3565b9c490fbf160dee66f072bf": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_aaf431dcd5f74b15ac953a1cc78f9eba",
            "placeholder": "​",
            "style": "IPY_MODEL_1bef78565e514c93a63f052972dff5c8",
            "value": "Saving the dataset (1/1 shards): 100%"
          }
        },
        "1497de94cc784e6aaae149c257f12615": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_26c55b8aa6d44de7b2d3f61fc7c1d92d",
            "placeholder": "​",
            "style": "IPY_MODEL_79fda5de6f6543f5aa35e5c7e2cd4513",
            "value": " 16/16 [00:03&lt;00:00,  4.47ba/s]"
          }
        },
        "149fed4dd0af42cbac6ecf91aae96a95": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1bef78565e514c93a63f052972dff5c8": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "22788242a4154607be60109abc2e1c88": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "22c63ed3758f47ef97a6606913493e16": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_be5d717b5eeb438b902c23454a5add5e",
            "max": 14732,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_09391d8b737a47ed859a7d985c108272",
            "value": 14732
          }
        },
        "258b6cf54bb24a4593da93c79d5e535e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "26c55b8aa6d44de7b2d3f61fc7c1d92d": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2994e876d1b94608b1ddd95c10f105d3": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": "hidden",
            "width": null
          }
        },
        "2d5c032c012942b5af8f54a0cda4bc57": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "45f9d0d5af264eb7849c4d0f54ad9bda": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_0f7df3565b9c490fbf160dee66f072bf",
              "IPY_MODEL_22c63ed3758f47ef97a6606913493e16",
              "IPY_MODEL_e194a109192e4451b27fa84c4c62d035"
            ],
            "layout": "IPY_MODEL_e50462043a494cb4adbee14a2d22a98a"
          }
        },
        "48f41cd576454f538d83ab27b50034de": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "53f8a742f41a45c69b999bdc475ce430": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_aa70a2ee95b64d129ce2eeb5a247bb24",
              "IPY_MODEL_b108f9f3d55c4983aa342465ddd2bf96",
              "IPY_MODEL_e5bb6b1babce463687f5e75ef56c468f"
            ],
            "layout": "IPY_MODEL_01d6ddac108a46f0b34a8ebc6ecb4a33"
          }
        },
        "578f354fa89f4f3e9a93fd8ee248c9ee": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f1201bf45509432693eb958d61f2e9cf",
            "placeholder": "​",
            "style": "IPY_MODEL_e23c50a9f877446f8a185d998929b07c",
            "value": "100%"
          }
        },
        "6e57ad2848fe4f2bb0f89c79e4a41f54": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7a452b361c9147fdb46bd0cdd3ab8ec2",
            "placeholder": "​",
            "style": "IPY_MODEL_cd993d7aaabb417e9523edcb036ae58c",
            "value": " 1/1 [00:01&lt;00:00,  1.00s/ba]"
          }
        },
        "78f28b924827405c88e26fb3bb7babab": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "79fda5de6f6543f5aa35e5c7e2cd4513": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7a452b361c9147fdb46bd0cdd3ab8ec2": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "83d7c0439c504879a2e27d24b6972038": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "866d10c8593a48b191625f8e88ea37bc": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c38113fe88314af5b73495d2869b0baf",
              "IPY_MODEL_09308312030d4d75a2c13ac59becafa9",
              "IPY_MODEL_ea88c4ce38584d2982fe4976b3b2e9dc"
            ],
            "layout": "IPY_MODEL_2994e876d1b94608b1ddd95c10f105d3"
          }
        },
        "8f91724f405942a59d0ad87a887095da": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8ff7f533460c480d8828bfdcfe313d7b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "93b8752310254f9d97ef252e1fdd7c22": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_578f354fa89f4f3e9a93fd8ee248c9ee",
              "IPY_MODEL_b9d0cd91e7b248358919101528b22ec7",
              "IPY_MODEL_6e57ad2848fe4f2bb0f89c79e4a41f54"
            ],
            "layout": "IPY_MODEL_149fed4dd0af42cbac6ecf91aae96a95"
          }
        },
        "95272f282a1742618ec5fecae87ba97e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9a69f1452bb34a6495ebfe376c7930ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9e8c25dffdfa431b8df9ee5f9fffafa6": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9f6bac398f734e1c9beb259289baf49b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "aa70a2ee95b64d129ce2eeb5a247bb24": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_258b6cf54bb24a4593da93c79d5e535e",
            "placeholder": "​",
            "style": "IPY_MODEL_e8f8c2a564094f6c9e984d1de7b4a536",
            "value": "100%"
          }
        },
        "aaf431dcd5f74b15ac953a1cc78f9eba": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b108f9f3d55c4983aa342465ddd2bf96": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9f6bac398f734e1c9beb259289baf49b",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_dbf8d79e1bcf41c7aacf805475f593a3",
            "value": 3
          }
        },
        "b5ef42051d9e46fb9537972e2c645663": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b8736b2ef5fc4bbba9a1946d0d1951c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_48f41cd576454f538d83ab27b50034de",
            "placeholder": "​",
            "style": "IPY_MODEL_9e8c25dffdfa431b8df9ee5f9fffafa6",
            "value": "100%"
          }
        },
        "b9d0cd91e7b248358919101528b22ec7": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c5f944d2f84841b1b8e4dcdeb8ab7e0d",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_c4d2cbe030564618b312315e116bc3dd",
            "value": 1
          }
        },
        "bb25b0588d4149a2b9c9dbeb327b2a33": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "be5d717b5eeb438b902c23454a5add5e": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c122efad9a644f6a8a4d686f9707ece7": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_b8736b2ef5fc4bbba9a1946d0d1951c3",
              "IPY_MODEL_fbf0168e87974d47a35a13bf2b66a72d",
              "IPY_MODEL_1497de94cc784e6aaae149c257f12615"
            ],
            "layout": "IPY_MODEL_b5ef42051d9e46fb9537972e2c645663"
          }
        },
        "c38113fe88314af5b73495d2869b0baf": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8ff7f533460c480d8828bfdcfe313d7b",
            "placeholder": "​",
            "style": "IPY_MODEL_78f28b924827405c88e26fb3bb7babab",
            "value": "Saving the dataset (1/1 shards): 100%"
          }
        },
        "c4d2cbe030564618b312315e116bc3dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c5f944d2f84841b1b8e4dcdeb8ab7e0d": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cd993d7aaabb417e9523edcb036ae58c": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "dbf8d79e1bcf41c7aacf805475f593a3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e194a109192e4451b27fa84c4c62d035": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_95272f282a1742618ec5fecae87ba97e",
            "placeholder": "​",
            "style": "IPY_MODEL_2d5c032c012942b5af8f54a0cda4bc57",
            "value": " 14732/14732 [00:00&lt;00:00, 114690.83 examples/s]"
          }
        },
        "e23c50a9f877446f8a185d998929b07c": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e50462043a494cb4adbee14a2d22a98a": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": "hidden",
            "width": null
          }
        },
        "e5bb6b1babce463687f5e75ef56c468f": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_22788242a4154607be60109abc2e1c88",
            "placeholder": "​",
            "style": "IPY_MODEL_9a69f1452bb34a6495ebfe376c7930ea",
            "value": " 3/3 [00:00&lt;00:00,  9.80it/s]"
          }
        },
        "e8f8c2a564094f6c9e984d1de7b4a536": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ea88c4ce38584d2982fe4976b3b2e9dc": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bb25b0588d4149a2b9c9dbeb327b2a33",
            "placeholder": "​",
            "style": "IPY_MODEL_8f91724f405942a59d0ad87a887095da",
            "value": " 819/819 [00:00&lt;00:00, 23586.16 examples/s]"
          }
        },
        "f070e098fab84c50b7223d6424a9d369": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f1201bf45509432693eb958d61f2e9cf": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fbf0168e87974d47a35a13bf2b66a72d": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fc8a9c5ca08e491d883ce8006d200004",
            "max": 16,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f070e098fab84c50b7223d6424a9d369",
            "value": 16
          }
        },
        "fc8a9c5ca08e491d883ce8006d200004": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
